
# remove everything
rm(list=ls())

# =================================================================================
# ---------------------------------------------------------------------
# Drop outliers in dataset "Data" according to the "v"th column
# at each percentage "p" at bottom and top. The default value of
# p is 0.005. v is a vector of numbers.
# ---------------------------------------------------------------------

outlier <- function(Data, v, p=0.005) {
  out <- c()
  for(i in v) {
    out <- rbind(out, quantile(Data[,i], probs=c(p, 1-p)))
  }
  j <- 1
  for(i in v) {
    Data <- Data[Data[,i]>out[j,1] & Data[,i]<out[j,2], ]; print(dim(Data))
    j <- j+1
  }
  return(Data)
}

# =================================================================================

# Load packages
library(foreign)
library(gmm)
library(lattice)

# read data industry-by-industry
Data <- read.dta('Ind39.dta')

Data <- outlier(Data, which(names(Data)=="sm"), p=0.005)

# Delete obs according to variable "Foreign"
Data <- subset(Data, F>=0 & F<=1 & !is.na(Province) & sm<=1)
# summary(Data)
names(Data)
r1 <- summary(exp(Data$y)); r2 <- summary(exp(Data$k)); r3 <- summary(exp(Data$l)); r4 <- summary(exp(Data$m)); 
r5 <- summary(Data$F); r6 <- summary(as.numeric(Data$F>0))
Table <- rbind(r1, r2, r3, r4, r5, r6); Table


# ========================
# histogram
histogram(Data$F, col = 'grey', main = "(a)", xlab = "Foreign Equity", ylab="Relative Frequency (%)", freq = FALSE, breaks = 10, scales = list(cex=1))
histogram(Data$F[Data$F>0], col = 'grey', main = "(b)", xlab = "Foreign Equity", ylab="Relative Frequency (%)", freq = FALSE, breaks = 10, scales = list(cex=1))

# ========================
# map

library(maps)
library(mapdata)
library(maptools)

location <- readShapePoly('bou2_4p.shp')

dat <- read.csv('capital location.csv', header=TRUE)
dat2 = read.csv(text = "??,jd,wd
                Beijing,116.4666667,39.9
                Shanghai,121.4833333,31.23333333
                Guangzhou,113.25,23.13333333"
)



# Show the density of firms by deepth of shade

getColor=function(mapdata,provname,provcol,othercol)
{
  f=function(x,y) ifelse(x %in% y,which(y==x),0);
  colIndex=sapply(mapdata$NAME,f,provname);
  fgc=c(othercol,provcol)[colIndex+1];
  return(fgc);
}

as.character(na.omit(unique(location$NAME)));

provname=as.character(na.omit(unique(location$NAME)));

# -------------------------
# province level foreign equity
G <- Data$F; province <- Data$Province
order.prov <- c(10, 5, 30, 9, 8, 27, 7, 
                1, 6, 3, 26, 28, 29, 13,
                31, 19, 11, 12, 24, 18, 4,
                2, 14, 17, 15, 25, 22, 16,
                21, 32, 20, 33, 23)
G.prov <- c(aggregate(G, list(province), mean)$x,0,0); pop <- G.prov[order.prov]
# set pop>0.1 to 0.1
pop[pop>0.1] <- 0.1

provcol=rgb(red=255*(1-pop/max(pop)),green=255*(1-pop/max(pop)),blue=255*(1-pop/max(pop)), max=255, alpha=180);
othercol <- rgb(250, 250, 250, max=255, alpha=100)

par(mar=rep(0,4))
plot(location,col=getColor(location,provname,provcol,othercol),ylim = c(18, 54));
points(dat2$jd, dat2$wd, pch = 19 , col = "black")
text(dat2$jd, dat2$wd, dat2[, 1], cex = 1.2, col = 'black', pos = c(3, 3, 1))
axis(1, lwd = 0); axis(2, lwd = 0); axis(3, lwd = 0); axis(4, lwd = 0)

# -------------------------
# province level number of firms
N.prov <- c(data.frame(table(province))$Freq/length(province),0,0); pop2 <- N.prov[order.prov]
pop2[pop2>0.1] <- 0.1

provcol2=rgb(red=255*(1-pop2/max(pop2)),green=255*(1-pop2/max(pop2)),blue=255*(1-pop2/max(pop2)), max=255, alpha=180);
plot(location,col=getColor(location,provname,provcol2,othercol),ylim = c(18, 54));
points(dat2$jd, dat2$wd, pch = 19 , col = "black")
text(dat2$jd, dat2$wd, dat2[, 1], cex = 1.2, col = 'black', pos = c(3, 3, 1))
axis(1, lwd = 0); axis(2, lwd = 0); axis(3, lwd = 0); axis(4, lwd = 0)



